Exploring the pattern of macroeconomic variables on different North American countries through interactive Data Visualization
Data cleaning and preprocessing to handle missing values, outliers, and inconsistencies
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.1
getwd()
## [1] "C:/Users/bridget.chukwu/Documents/Summer Course STAT/STAT 714/Final Project_714"
data<-read_excel("C:\\Users\\bridget.chukwu\\Documents\\Summer Course STAT\\STAT 714\\Final Project_714\\MacroVariables Data.xls")
head(data)
## # A tibble: 6 × 23
## Entity countryname countrycode Time year GDP GovEx `Inf` Int Tax
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 United States USA 1 2004 3.85 1.85e12 2.68 1.61 9.54
## 2 1 United States USA 2 2005 3.48 1.96e12 3.39 2.96 10.7
## 3 1 United States USA 3 2006 2.78 2.07e12 3.23 4.73 11.3
## 4 1 United States USA 4 2007 2.01 2.20e12 2.85 5.21 11.3
## 5 1 United States USA 5 2008 0.122 2.35e12 3.84 3.11 10.3
## 6 1 United States USA 6 2009 -2.60 2.43e12 -0.356 2.59 7.90
## # ℹ 13 more variables: Unem <dbl>, lnGDP <dbl>, lnGovEx <dbl>, lnInf <dbl>,
## # lnInt <dbl>, lnTax <dbl>, lnUnem <dbl>, CAN <dbl>, CRI <dbl>, DOM <dbl>,
## # HND <dbl>, MEX <dbl>, PAN <dbl>
dim(data) # List the names of columns in the dataset
## [1] 133 23
Cleaning the data
# Remove unneeded columns and columns with zero values from data dataset
data <- data[, !(names(data) %in% c("Time" , "Entity", "lnGDP", "lnGovEx", "lnInf", "lnInt", "lnTax", "lnUnem", "CAN", "CRI", "DOM", "HND", "MEX", "PAN"))]
head(data)
## # A tibble: 6 × 9
## countryname countrycode year GDP GovEx `Inf` Int Tax Unem
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States USA 2004 3.85 1851919958016 2.68 1.61 9.54 5.53
## 2 United States USA 2005 3.48 1962030006272 3.39 2.96 10.7 5.08
## 3 United States USA 2006 2.78 2072949948416 3.23 4.73 11.3 4.62
## 4 United States USA 2007 2.01 2198530031616 2.85 5.21 11.3 4.62
## 5 United States USA 2008 0.122 2353439965184 3.84 3.11 10.3 5.78
## 6 United States USA 2009 -2.60 2433430061056 -0.356 2.59 7.90 9.25
dim(data)
## [1] 133 9
colnames(data)
## [1] "countryname" "countrycode" "year" "GDP" "GovEx"
## [6] "Inf" "Int" "Tax" "Unem"
###missing value
sum(is.na(data)) # input the formula to detect any missing value
## [1] 0
#no missing value found
###
Outliers detection
# Standardize the numeric columns to z-scores
z_scores <- apply(data[, sapply(data, is.numeric)], 2, function(x) (x - mean(x)) / sd(x))
# Identify rows with any z-score greater than 3 (considered outliers)
outlier_rows <- apply(z_scores, 1, function(row) any(abs(row) > 3))
# Create a dataset without the outliers
cleaned_data <- data[!outlier_rows, ]
# Ensure at least one record per country remains
country_column <- "countrycode"
unique_countries <- unique(data[[country_column]])
# Loop over countries to check if they are represented
for (country in unique_countries) {
if (!country %in% cleaned_data[[country_column]]) {
# If a country is not present in the cleaned data, retain one row from the original data
row_to_keep <- My_data[My_data[[country_column]] == country, ][1, ]
cleaned_data <- rbind(cleaned_data, row_to_keep)
}
}
# Check the remaining dimension after removing outliers
dim(cleaned_data)
## [1] 126 9
# Optionally, reassign cleaned data to original variable
final_data <- cleaned_data
Data visualization - Scatter Plot
Box Plot
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
# Data visualization -
boxplot(final_data$`Unem`, ylab = "Unem") #create boxplot for Unmeployment

out_Un<- boxplot.stats(final_data$`Unem`)$out # show the statistics from the boxplot and detect the outliers in the dataset
outUn_ind<- which(final_data$`Unem` %in% c(out_Un)) #create a variable outUn_ind with the detected outliers
outUn_ind
## [1] 51 52
boxplot(final_data$`Unem`, ylab = "Unem", main = "Unem")#create a boxplot
mtext(paste("outliers: ", paste(out_Un, collapse = ", "))) #show the outliers value on the boxplot

library(plotly)
library(ggplot2)
# Data visualization - Boxplot for Tax
boxplot(final_data$`Tax`, ylab = "Tax") # Create boxplot for Tax

out_Tax <- boxplot.stats(final_data$`Tax`)$out # Detect the outliers in the dataset
outTax_ind <- which(final_data$`Tax` %in% c(out_Tax)) # Create a variable outTax_ind with the detected outliers
outTax_ind
## integer(0)
boxplot(final_data$`Tax`, ylab = "Tax", main = "Tax") # Create a boxplot for Tax
mtext(paste("outliers: ", paste(out_Tax, collapse = ", "))) # Show the outliers value on the boxplot

library(plotly)
library(ggplot2)
# Data visualization - Boxplot for Inf
boxplot(final_data$`Inf`, ylab = "Inf") # Create boxplot for Inf

out_Inf <- boxplot.stats(final_data$`Inf`)$out # Detect the outliers in the dataset
outInf_ind <- which(final_data$`Inf` %in% c(out_Inf)) # Create a variable outInf_ind with the detected outliers
outInf_ind
## [1] 20 35 36 37 39 93
boxplot(final_data$`Inf`, ylab = "Inf", main = "Inf") # Create a boxplot for Inf
mtext(paste("outliers: ", paste(out_Inf, collapse = ", "))) # Show the outliers value on the boxplot

library(plotly)
library(ggplot2)
# Data visualization - Boxplot for GDP
boxplot(final_data$`GDP`, ylab = "GDP") # Create boxplot for GDP

out_GDP <- boxplot.stats(final_data$`GDP`)$out # Detect the outliers in the dataset
outGDP_ind <- which(final_data$`GDP` %in% c(out_GDP)) # Create a variable outGDP_ind with the detected outliers
outGDP_ind
## [1] 6 32 33 55 56 59 68 69 75 86 94 105 106 113 124
boxplot(final_data$`GDP`, ylab = "GDP", main = "GDP") # Create a boxplot for GDP
mtext(paste("outliers: ", paste(out_GDP, collapse = ", "))) # Show the outliers value on the boxplot

# Data visualization - GDP by Country
gdp_comparison <- final_data %>%
plot_ly(x = ~countrycode, y = ~GDP, color = ~countrycode, type = "box") %>%
layout(title = "GDP per capita Comparison by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "GDP"),
showlegend = FALSE)
gdp_comparison
# Function to identify outlier indices based on IQR
outlier_indices <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
which(x < (Q1 - 1.5 * IQR) | x > (Q3 + 1.5 * IQR))
}
# Find outlier indices for each variable
outliers_int <- outlier_indices(final_data$Int)
outliers_gdp <- outlier_indices(final_data$GDP)
outliers_tax <- outlier_indices(final_data$Tax)
outliers_unem <- outlier_indices(final_data$Unem)
# Combine all outlier indices and remove duplicates
all_outliers <- unique(c(outliers_int, outliers_gdp, outliers_tax, outliers_unem))
# Remove rows with outliers from the dataset
final_data_clean <- final_data[-all_outliers, ]
# Plotting box plots using the cleaned data
boxplot(final_data_clean$Int, main = "Interest Rate (Cleaned)", ylab = "Interest Rate")

boxplot(final_data_clean$GDP, main = "GDP (Cleaned)", ylab = "GDP")

boxplot(final_data_clean$Tax, main = "Tax (Cleaned)", ylab = "Tax")

boxplot(final_data_clean$Unem, main = "Unemployment (Cleaned)", ylab = "Unemployment")

# Data visualization - Int (Interest) by Country
int_comparison <- final_data %>%
plot_ly(x = ~countrycode, y = ~Int, color = ~countrycode, type = "box") %>%
layout(title = "Interest rate Comparison by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "Interest Rate"),
showlegend = FALSE)
int_comparison
# Data visualization - Unem (Unemployment) by Country
unem_comparison <- final_data %>%
plot_ly(x = ~countrycode, y = ~Unem, color = ~countrycode, type = "box") %>%
layout(title = "Unemployment rate Comparison by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "Unemployment Rate"),
showlegend = FALSE)
unem_comparison
# Data visualization - Tax rate by Country
tax_comparison <- final_data %>%
plot_ly(x = ~countrycode, y = ~Tax, color = ~countrycode, type = "box") %>%
layout(title = "Tax Comparison by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "Tax"),
showlegend = FALSE)
tax_comparison
Bar Chart
Visualisation of the Aggregate of different variables by countries from 2004 - 2022
library(plotly)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Calculate the sum of Interest rate for each country
sum1 <- aggregate(Int ~ countrycode, data = final_data, FUN = sum)
# Sort the data by the sum of Int value in descending order
top_7_coun <- head(sum1[order(-sum1$Int), ], 7)
# Convert Int to factor to ensure the x-axis is discrete for bar chart
top_7_coun$countrycode <- factor(top_7_coun$countrycode)
fig1 <- plot_ly(data = top_7_coun, x = ~countrycode, y = ~Int,
text = ~Int, textfont = list(color = 'rgb(101, 67, 33)'), textposition = 'inside',
type = "bar", marker = list(color = 'rgb(225, 253, 208)',
line = list(color = 'rgb(101, 67, 33)',
width = 2))) %>%
layout(title = "<b>Aggregated Interest rate values by Countries</b>",
plot_bgcolor = 'white',
xaxis = list(title = "<b>Countries</b>",
zerolinecolor = 'ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(title = "<b>Agg.Int</b>",
zerolinecolor = '#CD6600',
zerolinewidth = 2,
gridcolor = 'ffff'))
fig1
library(plotly)
library(dplyr)
# Calculate the sum of Unemployment for each country
sum1 <- aggregate(Unem ~ countrycode, data = final_data, FUN = sum)
# Sort the data by the sum of Unemployment in descending order
top_7_coun <- head(sum1[order(-sum1$Unem), ], 7)
# Convert Unemployment to factor to ensure the x-axis is discrete for bar chart
top_7_coun$countrycode <- factor(top_7_coun$countrycode)
fig1 <- plot_ly(data = top_7_coun, x = ~countrycode, y = ~Unem,
text = ~Unem, textfont = list(color = 'rgb(101, 67, 33)'), textposition = 'inside',
type = "bar", marker = list(color = 'rgb(225, 253, 208)',
line = list(color = 'rgb(101, 67, 33)',
width = 2))) %>%
layout(title = "<b>Aggregated Unemployment values by Countries</b>",
plot_bgcolor = 'white',
xaxis = list(title = "<b>Countries</b>",
zerolinecolor = 'ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(title = "<b>Agg.Unem</b>",
zerolinecolor = '#CD6600',
zerolinewidth = 2,
gridcolor = 'ffff'))
fig1
# Calculate the sum of Tax for each country
sum1 <- aggregate(Tax ~ countrycode, data = final_data, FUN = sum)
# Sort the data by the sum of Tax in descending order
top_7_coun <- head(sum1[order(-sum1$Tax), ], 7)
# Convert countrycode to factor to ensure the x-axis is discrete for the bar chart
top_7_coun$countrycode <- factor(top_7_coun$countrycode)
# Plot the data using plotly
fig1 <- plot_ly(data = top_7_coun, x = ~countrycode, y = ~Tax,
text = ~Tax, textfont = list(color = 'rgb(101, 67, 33)'), textposition = 'inside',
type = "bar", marker = list(color = 'rgb(225, 253, 208)',
line = list(color = 'rgb(101, 67, 33)',
width = 2))) %>%
layout(title = "<b>Aggregated Tax by Countries</b>",
plot_bgcolor = 'white',
xaxis = list(title = "<b>Countries</b>",
zerolinecolor = 'ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(title = "<b>Agg.Tax</b>",
zerolinecolor = '#CD6600',
zerolinewidth = 2,
gridcolor = 'ffff'))
fig1
# Calculate the sum of GDP for each country
sum1 <- aggregate(GDP ~ countrycode, data = final_data, FUN = sum)
# Sort the data by the sum of Tax in descending order
top_7_coun <- head(sum1[order(-sum1$GDP), ], 7)
# Convert countrycode to factor to ensure the x-axis is discrete for the bar chart
top_7_coun$countrycode <- factor(top_7_coun$countrycode)
# Plot the data using plotly
fig1 <- plot_ly(data = top_7_coun, x = ~countrycode, y = ~GDP,
text = ~GDP, textfont = list(color = 'rgb(101, 67, 33)'), textposition = 'inside',
type = "bar", marker = list(color = 'rgb(225, 253, 208)',
line = list(color = 'rgb(101, 67, 33)',
width = 2))) %>%
layout(title = "<b>Aggregated GDP by Countries</b>",
plot_bgcolor = 'white',
xaxis = list(title = "<b>Countries</b>",
zerolinecolor = 'ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(title = "<b>Agg.Tax</b>",
zerolinecolor = '#CD6600',
zerolinewidth = 2,
gridcolor = 'ffff'))
fig1
Scattered Plot
# Load required library
library(plotly)
# Scatter plot for GDP vs Inf
fig_1 <- plot_ly(
data = final_data, # Specify the data source
x = ~final_data$`GDP`, # Set the X-axis data from the "GDP" column
y = ~final_data$`Inf`, # Set the Y-axis data from the "Inf" (Inflation) column
marker = list(
size = 5, # Set the size of the markers
color = "turquoise", # Set the color of the markers
line = list(color = "gray0", width = 2) # Set the color and width of the marker borders
)
)
fig_1 <- fig_1 %>% layout(
title = "Change in GDP vs. Inflation", # Set the plot title
yaxis = list(zeroline = FALSE, title = "Inflation (% change)"), # Customize the Y-axis properties
xaxis = list(zeroline = FALSE, title = "GDP (% change)") # Customize the X-axis properties
)
fig_1 # Display the scatter plot
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
# Scatter plot for GDP vs Unem
fig_2 <- plot_ly(
data = final_data, # Specify the data source
x = ~final_data$`GDP`, # Set the X-axis data from the "GDP" column
y = ~final_data$`Unem`, # Set the Y-axis data from the "Unem" (Unemployment) column
marker = list(
size = 5, # Set the size of the markers
color = "blue", # Set the color of the markers
line = list(color = "gray0", width = 2) # Set the color and width of the marker borders
)
)
fig_2 <- fig_2 %>% layout(
title = "Change in GDP vs. Unemployment", # Set the plot title
yaxis = list(zeroline = FALSE, title = "Unemployment (% change)"), # Customize the Y-axis properties
xaxis = list(zeroline = FALSE, title = "GDP (% change)") # Customize the X-axis properties
)
fig_2 # Display the scatter plot
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
# Scatter plot for GDP vs Int
fig_3 <- plot_ly(
data = final_data, # Specify the data source
x = ~final_data$`GDP`, # Set the X-axis data from the "GDP" column
y = ~final_data$`Int`, # Set the Y-axis data from the "Int" (Interest Rate) column
marker = list(
size = 5, # Set the size of the markers
color = "green", # Set the color of the markers
line = list(color = "gray0", width = 2) # Set the color and width of the marker borders
)
)
fig_3 <- fig_3 %>% layout(
title = "Change in GDP vs. Interest Rate", # Set the plot title
yaxis = list(zeroline = FALSE, title = "Interest Rate (% change)"), # Customize the Y-axis properties
xaxis = list(zeroline = FALSE, title = "GDP (% change)") # Customize the X-axis properties
)
fig_3 # Display the scatter plot
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
# Scatter plot for GDP vs Tax
fig_4 <- plot_ly(
data = final_data, # Specify the data source
x = ~final_data$`GDP`, # Set the X-axis data from the "GDP" column
y = ~final_data$`Tax`, # Set the Y-axis data from the "Tax" column
marker = list(
size = 5, # Set the size of the markers
color = "red", # Set the color of the markers
line = list(color = "gray0", width = 2) # Set the color and width of the marker borders
)
)
fig_4 <- fig_4 %>% layout(
title = "Change in GDP vs. Tax", # Set the plot title
yaxis = list(zeroline = FALSE, title = "Tax (% change)"), # Customize the Y-axis properties
xaxis = list(zeroline = FALSE, title = "GDP (% change)") # Customize the X-axis properties
)
fig_4 # Display the scatter plot
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Line Graphs
library(plotly)
# Line graph for Interest Rate by Country
interest_rate_graph <- plot_ly(final_data, x = ~countrycode, y = ~Int, type = 'scatter', mode = 'lines+markers',
line = list(color = 'blue')) %>%
layout(title = "Interest Rate by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "Interest Rate"),
showlegend = FALSE)
interest_rate_graph
library(plotly)
# Line graph for Tax by Country
tax_graph <- plot_ly(final_data, x = ~countrycode, y = ~Tax, type = 'scatter', mode = 'lines+markers',
line = list(color = 'red')) %>%
layout(title = "Tax by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "Tax"),
showlegend = FALSE)
tax_graph
# Line graph for Unem (Unemployment) by Country
unem_graph <- plot_ly(final_data, x = ~countrycode, y = ~Unem, type = 'scatter', mode = 'lines+markers',
line = list(color = 'green')) %>%
layout(title = "Unemployment by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "Unemployment Rate"),
showlegend = FALSE)
unem_graph
# Line graph for GDP by Country
gdp_graph <- plot_ly(final_data, x = ~countrycode, y = ~GDP, type = 'scatter', mode = 'lines+markers',
line = list(color = 'purple')) %>%
layout(title = "GDP by Country",
xaxis = list(title = "Country"),
yaxis = list(title = "GDP"),
showlegend = FALSE)
gdp_graph
library(plotly)
# Trend line for Interest Rate (Int) by Year for different countries
fig <- plot_ly(final_data, x = ~year, y = ~Int, color = ~countrycode, type = "scatter", mode = "lines+markers",
hoverinfo = 'text',
text = ~paste("Year: ", year, "<br>Country: ", countrycode, "<br>Interest Rate: ", round(Int, 2))) %>%
layout(showlegend = TRUE, xaxis = list(rangeslider = list(visible = TRUE)))
fig <- fig %>% config(
scrollZoom = TRUE, editable = TRUE, selectdirection = "any", staticplot = FALSE
)
fig <- fig %>%
layout(title = "<b>Trend in Interest Rates over Time by Country</b>",
xaxis = list(title = "Year",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
yaxis = list(title = "Interest Rate",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
plot_bgcolor = 'lightblue', width = 950)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
# Display the chart
fig
## Warning: 'config' objects don't have these attributes: 'selectdirection', 'staticplot'
## Valid attributes include:
## 'autosizable', 'displaylogo', 'displayModeBar', 'doubleClick', 'doubleClickDelay', 'editable', 'edits', 'fillFrame', 'frameMargins', 'globalTransforms', 'linkText', 'locale', 'locales', 'logging', 'mapboxAccessToken', 'modeBarButtons', 'modeBarButtonsToAdd', 'modeBarButtonsToRemove', 'notifyOnLogging', 'plotGlPixelRatio', 'plotlyServerURL', 'queueLength', 'responsive', 'scrollZoom', 'sendData', 'setBackground', 'showAxisDragHandles', 'showAxisRangeEntryBoxes', 'showEditInChartStudio', 'showLink', 'showSendToCloud', 'showSources', 'showTips', 'staticPlot', 'toImageButtonOptions', 'topojsonURL', 'typesetMath', 'watermark'
colnames(data)
## [1] "countryname" "countrycode" "year" "GDP" "GovEx"
## [6] "Inf" "Int" "Tax" "Unem"
Line Charts - Trendlines
fig_tax <- plot_ly(final_data, x = ~year, y = ~Tax, type = "scatter", mode = "lines+markers",
color = ~countrycode, hoverinfo = 'text',
text = ~paste("Year: ", year, "<br>Country: ", countrycode, "<br>Tax: ", round(Tax, 2))) %>%
layout(showlegend = TRUE, xaxis = list(rangeslider = list(visible = TRUE)))
fig_tax <- fig_tax %>% config(
scrollZoom = TRUE, editable = TRUE, selectdirection = "any", staticplot = FALSE
)
fig_tax <- fig_tax %>%
layout(title = "<b>Trend in Tax over Time by Country</b>",
xaxis = list(title = "Year",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
yaxis = list(title = "Tax",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
plot_bgcolor = 'lightblue', width = 950)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
fig_tax
## Warning: 'config' objects don't have these attributes: 'selectdirection', 'staticplot'
## Valid attributes include:
## 'autosizable', 'displaylogo', 'displayModeBar', 'doubleClick', 'doubleClickDelay', 'editable', 'edits', 'fillFrame', 'frameMargins', 'globalTransforms', 'linkText', 'locale', 'locales', 'logging', 'mapboxAccessToken', 'modeBarButtons', 'modeBarButtonsToAdd', 'modeBarButtonsToRemove', 'notifyOnLogging', 'plotGlPixelRatio', 'plotlyServerURL', 'queueLength', 'responsive', 'scrollZoom', 'sendData', 'setBackground', 'showAxisDragHandles', 'showAxisRangeEntryBoxes', 'showEditInChartStudio', 'showLink', 'showSendToCloud', 'showSources', 'showTips', 'staticPlot', 'toImageButtonOptions', 'topojsonURL', 'typesetMath', 'watermark'
fig_gdp <- plot_ly(final_data, x = ~year, y = ~GDP, type = "scatter", mode = "lines+markers",
color = ~countrycode, hoverinfo = 'text',
text = ~paste("Year: ", year, "<br>Country: ", countrycode, "<br>GDP: ", round(GDP, 2))) %>%
layout(showlegend = TRUE, xaxis = list(rangeslider = list(visible = TRUE)))
fig_gdp <- fig_gdp %>% config(
scrollZoom = TRUE, editable = TRUE, selectdirection = "any", staticplot = FALSE
)
fig_gdp <- fig_gdp %>%
layout(title = "<b>Trend in GDP per capital over Time by Country</b>",
xaxis = list(title = "Year",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
yaxis = list(title = "GDP",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
plot_bgcolor = 'lightblue', width = 950)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
fig_gdp
## Warning: 'config' objects don't have these attributes: 'selectdirection', 'staticplot'
## Valid attributes include:
## 'autosizable', 'displaylogo', 'displayModeBar', 'doubleClick', 'doubleClickDelay', 'editable', 'edits', 'fillFrame', 'frameMargins', 'globalTransforms', 'linkText', 'locale', 'locales', 'logging', 'mapboxAccessToken', 'modeBarButtons', 'modeBarButtonsToAdd', 'modeBarButtonsToRemove', 'notifyOnLogging', 'plotGlPixelRatio', 'plotlyServerURL', 'queueLength', 'responsive', 'scrollZoom', 'sendData', 'setBackground', 'showAxisDragHandles', 'showAxisRangeEntryBoxes', 'showEditInChartStudio', 'showLink', 'showSendToCloud', 'showSources', 'showTips', 'staticPlot', 'toImageButtonOptions', 'topojsonURL', 'typesetMath', 'watermark'
fig_unem <- plot_ly(final_data, x = ~year, y = ~Unem, type = "scatter", mode = "lines+markers",
color = ~countrycode, hoverinfo = 'text',
text = ~paste("Year: ", year, "<br>Country: ", countrycode, "<br>Unemployment: ", round(Unem, 2))) %>%
layout(showlegend = TRUE, xaxis = list(rangeslider = list(visible = TRUE)))
fig_unem <- fig_unem %>% config(
scrollZoom = TRUE, editable = TRUE, selectdirection = "any", staticplot = FALSE
)
fig_unem <- fig_unem %>%
layout(title = "<b>Trend in Unemployment over Time by Country</b>",
xaxis = list(title = "Year",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
yaxis = list(title = "Unemployment Rate",
zerolinecolor = 'white',
zerolinewidth = 2.5,
gridcolor = 'white'),
plot_bgcolor = 'lightblue', width = 950)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
fig_unem
## Warning: 'config' objects don't have these attributes: 'selectdirection', 'staticplot'
## Valid attributes include:
## 'autosizable', 'displaylogo', 'displayModeBar', 'doubleClick', 'doubleClickDelay', 'editable', 'edits', 'fillFrame', 'frameMargins', 'globalTransforms', 'linkText', 'locale', 'locales', 'logging', 'mapboxAccessToken', 'modeBarButtons', 'modeBarButtonsToAdd', 'modeBarButtonsToRemove', 'notifyOnLogging', 'plotGlPixelRatio', 'plotlyServerURL', 'queueLength', 'responsive', 'scrollZoom', 'sendData', 'setBackground', 'showAxisDragHandles', 'showAxisRangeEntryBoxes', 'showEditInChartStudio', 'showLink', 'showSendToCloud', 'showSources', 'showTips', 'staticPlot', 'toImageButtonOptions', 'topojsonURL', 'typesetMath', 'watermark'
Pie Chart
# Load necessary libraries
library(plotly)
# Create a blank graph
fig <- plot_ly()
# Add pie chart for Interest Rate (Int)
fig <- fig %>% add_pie(data = final_data, labels = ~countrycode, values = ~Int,
name = "Interest Rate", domain = list(row = 0, column = 0),
title = list(text = "Interest Rate"))
# Add pie chart for GDP
fig <- fig %>% add_pie(data = final_data, labels = ~countrycode, values = ~GDP,
name = "GDP", domain = list(row = 0, column = 1),
title = list(text = "GDP"))
# Add pie chart for Tax
fig <- fig %>% add_pie(data = final_data, labels = ~countrycode, values = ~Tax,
name = "Tax", domain = list(row = 1, column = 0),
title = list(text = "Tax"))
# Add pie chart for Unemployment (Unem)
fig <- fig %>% add_pie(data = final_data, labels = ~countrycode, values = ~Unem,
name = "Unemployment", domain = list(row = 1, column = 1),
title = list(text = "Unemployment"))
# Customize layout of the subplots
fig <- fig %>% layout(title = "Pie Charts with Subplots for Various Variables", showlegend = TRUE,
grid = list(rows = 2, columns = 2),
xaxis = list(showgrid = FALSE, zeroline = TRUE, showticklabels = TRUE),
yaxis = list(showgrid = FALSE, zeroline = TRUE, showticklabels = TRUE))
# Display the pie chart
fig
Histogram
# Load necessary libraries
library(plotly)
# Create a new plotly object with an alpha (transparency) value set to 0.6
fig <- plot_ly(alpha = 0.6)
# Add histograms for each variable
fig <- fig %>% add_histogram(x = final_data$Int, name = "Interest Rate")
fig <- fig %>% add_histogram(x = final_data$GDP, name = "GDP")
fig <- fig %>% add_histogram(x = final_data$Tax, name = "Tax")
fig <- fig %>% add_histogram(x = final_data$Unem, name = "Unemployment")
# Set the layout of the plotly object with "barmode" as "overlay" to overlay the histograms
fig <- fig %>% layout(barmode = "overlay",
title = "Overlayed Histograms for Various Variables",
xaxis = list(title = "Values"),
yaxis = list(title = "Frequency"))
# Display the overlaid histogram
fig
Error Bars
# Load necessary libraries
library(plyr)
## Warning: package 'plyr' was built under R version 4.4.1
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
library(plotly)
# Define new variables
x <- final_data$Unem # Unemployment
y <- final_data$countrycode # Country code
z <- final_data$Tax # Tax
# Create a new dataset with x, y, and z variables
new_data <- data.frame(x, y, z)
# Calculate the mean output (Tax) for each combination of 'Unem' and 'countrycode'
data_4_mean <- ddply(new_data, c("y", "x"), summarise, Output = mean(z, na.rm = TRUE))
# Calculate the standard deviation of output (Tax) for each combination of 'Unem' and 'countrycode'
data_sd <- ddply(new_data, c("y", "x"), summarise, Output = sd(z, na.rm = TRUE))
# Combine the mean and standard deviation data into a new dataframe
dta_4 <- data.frame(data_4_mean, data_sd$Output)
# Rename the 'data_sd.Output' column to 'sd' for clarity
dta_4 <- rename(dta_4, c("data_sd.Output" = "sd"))
# Convert the 'x' column (Unemployment) to a factor for better plotting
dta_4$x <- as.factor(dta_4$x)
# Create a grouped bar plot using Plotly
fig <- plot_ly(data = dta_4[which(dta_4$y == "HND"),], x = ~x, y = ~Output, type = "bar", name = "HND",
error_y = ~list(array = sd, color = "black"))
fig <- fig %>% add_trace(data = dta_4[which(dta_4$y == "DOM"),], name = "DOM")
fig <- fig %>% add_trace(data = dta_4[which(dta_4$y == "CRI"),], name = "CRI")
fig <- fig %>% add_trace(data = dta_4[which(dta_4$y == "PAN"),], name = "PAN")
fig <- fig %>% add_trace(data = dta_4[which(dta_4$y == "CAN"),], name = "CAN")
fig <- fig %>% add_trace(data = dta_4[which(dta_4$y == "MEX"),], name = "MEX")
fig <- fig %>% add_trace(data = dta_4[which(dta_4$y == "USA"),], name = "USA")
# Customize layout
fig <- fig %>% layout(title = "Mean Tax by Unemployment and Country with Standard Deviation",
xaxis = list(title = "Unemployment"),
yaxis = list(title = "Mean Tax"),
barmode = "group")
# Display the grouped bar plot
fig
Violin Plots
###violin plots
# Load necessary library
library(plotly)
# Create individual violin plots for each variable with year on the x-axis
fig_int <- plot_ly(final_data, x = ~year, y = ~Int, name = 'Interest Rate', type = 'violin',
box = list(visible = TRUE), meanline = list(visible = TRUE))
fig_gdp <- plot_ly(final_data, x = ~year, y = ~GDP, name = 'GDP', type = 'violin',
box = list(visible = TRUE), meanline = list(visible = TRUE))
fig_tax <- plot_ly(final_data, x = ~year, y = ~Tax, name = 'Tax', type = 'violin',
box = list(visible = TRUE), meanline = list(visible = TRUE))
fig_unem <- plot_ly(final_data, x = ~year, y = ~Unem, name = 'Unemployment', type = 'violin',
box = list(visible = TRUE), meanline = list(visible = TRUE))
# Combine the violin plots into a single subplot layout
fig <- subplot(fig_int, fig_gdp, fig_tax, fig_unem, nrows = 2, shareX = TRUE, shareY = TRUE)
# Customize the layout of the subplots
fig <- fig %>% layout(
title = "Violin Plots for Various Metrics by Year",
xaxis = list(
title = "Year"
),
yaxis = list(
title = "Values",
zeroline = FALSE
)
)
# Display the subplot
fig
contour Plot
# Define x and y variables
x <- final_data$GDP # GDP as the x variable
y <- final_data$Unem # Unemployment as the y variable
# Create a 2D density contour plot using plot_ly with x and y
fig <- plot_ly(x = x, y = y, type = "histogram2dcontour")
# Set layout options for the plot
layout_options <- list(
showlegend = TRUE,
margin = list(l = 80, r = 50, b = 50, t = 50),
xaxis = list(title = "GDP"),
yaxis = list(title = "Unemployment"),
title = list(text = "2D Density Contour Plot of GDP vs. Unemployment")
)
# Update the layout options for the plot
fig <- fig %>% layout(layout_options)
# Display the plot
fig
# Define x and y variables for Unemployment and Tax
x <- final_data$Unem # Unemployment as the x variable
y <- final_data$Tax # Tax as the y variable
# Create a 2D density contour plot using plot_ly with x and y
fig_unem_tax <- plot_ly(x = x, y = y, type = "histogram2dcontour")
# Set layout options for the plot
layout_options_unem_tax <- list(
showlegend = FALSE,
margin = list(l = 80, r = 50, b = 50, t = 50),
xaxis = list(title = "Unemployment"),
yaxis = list(title = "Tax"),
title = list(text = "2D Density Contour Plot of Unemployment vs. Tax")
)
# Update the layout options for the plot
fig_unem_tax <- fig_unem_tax %>% layout(layout_options_unem_tax)
# Display the plot
fig_unem_tax
# Define x and y variables for Unemployment and Interest Rate
x <- final_data$Unem # Unemployment as the x variable
y <- final_data$Int # Interest Rate as the y variable
# Create a 2D density contour plot using plot_ly with x and y
fig_unem_int <- plot_ly(x = x, y = y, type = "histogram2dcontour")
# Set layout options for the plot
layout_options_unem_int <- list(
showlegend = FALSE,
margin = list(l = 80, r = 50, b = 50, t = 50),
xaxis = list(title = "Unemployment"),
yaxis = list(title = "Interest Rate"),
title = list(text = "2D Density Contour Plot of Unemployment vs. Interest Rate")
)
# Update the layout options for the plot
fig_unem_int <- fig_unem_int %>% layout(layout_options_unem_int)
# Display the plot
fig_unem_int
Heatmap
# Load necessary library
library(plotly)
# Subset the data to include only the relevant columns
data_subset <- final_data[, c("Int", "GDP", "Tax", "Unem", "Inf")]
# Calculate the correlation matrix
cor_matrix <- cor(data_subset, use = "complete.obs") # use = "complete.obs" to handle NA values
# Create the heatmap
fig <- plot_ly(
z = cor_matrix,
type = "heatmap",
colorscale = "Viridis", # You can choose a different colorscale if desired
x = colnames(cor_matrix),
y = colnames(cor_matrix),
text = round(cor_matrix, 2),
hoverinfo = "text"
)
fig <- fig %>% config(
scrollZoom = TRUE, editable = TRUE, selectdirection = "any", staticplot = FALSE
)
# Set axis labels and title
fig <- fig %>% layout(
xaxis = list(title = "Variables"),
yaxis = list(title = "Variables"),
title = "Correlation Heatmap: Economic Indicators"
)
# Display the heatmap
fig
## Warning: 'config' objects don't have these attributes: 'selectdirection', 'staticplot'
## Valid attributes include:
## 'autosizable', 'displaylogo', 'displayModeBar', 'doubleClick', 'doubleClickDelay', 'editable', 'edits', 'fillFrame', 'frameMargins', 'globalTransforms', 'linkText', 'locale', 'locales', 'logging', 'mapboxAccessToken', 'modeBarButtons', 'modeBarButtonsToAdd', 'modeBarButtonsToRemove', 'notifyOnLogging', 'plotGlPixelRatio', 'plotlyServerURL', 'queueLength', 'responsive', 'scrollZoom', 'sendData', 'setBackground', 'showAxisDragHandles', 'showAxisRangeEntryBoxes', 'showEditInChartStudio', 'showLink', 'showSendToCloud', 'showSources', 'showTips', 'staticPlot', 'toImageButtonOptions', 'topojsonURL', 'typesetMath', 'watermark'
Bubble Chart
# Load necessary library
library(plotly)
# Filter the dataset to include only relevant columns for GDP, Unemployment, and Interest Rate
fuel1 <- final_data %>% select(GDP, Unem, Int)
# Create bubble chart
fig <- plot_ly(fuel1, x = ~GDP, y = ~Unem, type = "scatter", mode = "markers",
color = I('blue'), size = ~Int,
marker = list(sizes = c(5, 20), sizemode = "area", opacity = 0.5),
text = ~paste("Interest Rate: ", Int),
hoverinfo = "text") %>%
layout(title = "GDP vs. Unemployment (Bubble Chart)",
xaxis = list(title = "GDP"),
yaxis = list(title = "Unemployment"))
# Add interactive features
fig <- fig %>% config(
# Enable zooming functionality
scrollZoom = TRUE,
# Enable panning functionality
editable = TRUE,
# Enable data selection (filtering) by clicking and dragging on the plot
selectdirection = "any",
# Enable data selection (filtering) by holding down the shift key and clicking on the plot
staticplot = FALSE
)
# Display the interactive plot
fig
## Warning: `line.width` does not currently support multiple values.
## Warning: 'config' objects don't have these attributes: 'selectdirection', 'staticplot'
## Valid attributes include:
## 'autosizable', 'displaylogo', 'displayModeBar', 'doubleClick', 'doubleClickDelay', 'editable', 'edits', 'fillFrame', 'frameMargins', 'globalTransforms', 'linkText', 'locale', 'locales', 'logging', 'mapboxAccessToken', 'modeBarButtons', 'modeBarButtonsToAdd', 'modeBarButtonsToRemove', 'notifyOnLogging', 'plotGlPixelRatio', 'plotlyServerURL', 'queueLength', 'responsive', 'scrollZoom', 'sendData', 'setBackground', 'showAxisDragHandles', 'showAxisRangeEntryBoxes', 'showEditInChartStudio', 'showLink', 'showSendToCloud', 'showSources', 'showTips', 'staticPlot', 'toImageButtonOptions', 'topojsonURL', 'typesetMath', 'watermark'